---
title: 'The Blessings of Scarcity: The Cold War Origins of Smaller States Prosperity'
author: "Muhammad Ben Khalid and Steve L. Monroe"
date: 'Replication Code for Analysis in Manuscript (Oct. 2024)'
output: html_document
---


```{r setup, include=FALSE, echo=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r, }
## Table of Contents ########
## 1. Load Packages and Upload Data
## 2. Map of States in the Analysis (Figure 2)
## 3. H1: Early Independence Size and Post-Cold War Development (Table 1, Figure 3)
## 4. H2: Early Independence Size and Cold War Embedded Liberalism (Figure 4)

# note this analysis was run on R Studio Version:2023.12.1+402  

```


## 1. Load Packages and Upload Data
```{r, include = FALSE, echo=FALSE}


library(stargazer)
library(here)
library(dplyr)
library(foreign)
library(mediation)
library(lmtest)
library(tidyr)
library(ggplot2)
library(broom)
library(cowplot)
library(tidyr)
library(ggrepel)
library(countrycode)
library(xtable)
library(ivreg)
library(cowplot)
library(dotwhisker)
library(car)
library(maps)

# set working directory
here("")

# upload data
data <- read.csv("data_main.csv")

```


## 2. Map of States in the Analysis (Figure 2)
```{r fig.align='center', warning=FALSE, echo=FALSE}
# create a variable to identify states with average populations of less than one million
# between 1946 and 1975. we will use this variable to color small states in the mapl

data$small.population.1 <- ifelse(data$avg.pop.1946.75 < 1000000, 1, 0)

world <- map_data("world")

world$code <- countrycode(sourcevar = world$region, origin = "country.name", destination = "iso3c")

world <- left_join(world, data[, c("Country.Code", "small.population.1")], by = c("code" = "Country.Code"))
world <- world %>% 
  mutate(across(long, ~ifelse(code=='FJI', abs(.x), .x)))

world$small.population.1[is.na(world$small.population.1) == T] <- 2
world$cases <- ifelse(world$small.population.1 > 0, 
                                 "Case", "No Case")
region.lab.data <- world %>% 
  group_by(code) %>% 
  summarise(long = mean(long, na.rm = TRUE),
            lat = mean(lat, na.rm = TRUE),
            small.population.1 = first(small.population.1))

# Remove labels for some of the small states that are teritorially visible

smallo <- region.lab.data %>% 
  filter(small.population.1 == 1)
smallo <- smallo[smallo$code != "BWA",]
smallo <- smallo[smallo$code != "MRT",]
smallo <- smallo[smallo$code != "GUY",]
smallo <- smallo[smallo$code != "SUR",]
smallo <- smallo[smallo$code != "OMN",]
smallo <- smallo[smallo$code != "ARE",]
smallo <- smallo[smallo$code != "GAB",]
smallo <- smallo[smallo$code != "GNB",]
smallo <- smallo[smallo$code != "SWZ",]

map <- ggplot(data = world, 
        mapping = aes(x = long, y = lat, group = group, fill =   factor(small.population.1))) +
  geom_polygon(color = "gray80", size = 0.1) +
  coord_cartesian(ylim = c(-50, 90)) + 
  #labs(fill = "Small by Population") +
  theme_void() +
 scale_fill_manual(values = c("darkgrey", "darkgrey", "white"))

map + geom_text_repel(data = smallo, aes(x= long, y= lat, label= code), inherit.aes = FALSE, box.padding = 4, max.overlaps = Inf)+ theme(legend.position = "none")

figure2 <- map + geom_text_repel(data = smallo, aes(x= long, y= lat, label= code), inherit.aes = FALSE, box.padding = 3.5, max.overlaps = Inf)+theme(legend.position = "none")


# export figure2

```


## 3. Hypothesis 1: Historical Size and Post Cold War Development


\begin{table}[ht!] \centering 
  \caption{H1: Size at Independence and Post-Cold War Economic Development} 
  \label{tab:table1} 
  \resizebox{\textwidth}{!}{
```{r tidy=TRUE, tidy.opts=list(width.cutoff=65), warning=FALSE, echo=FALSE, results='asis'}

                   
reg1 <- lm (log.avg.gdp.92.20 ~ log.avg.pop.1946.75 + region, data = data)

# add geographic variables
reg2 <- lm (log.avg.gdp.92.20 ~ log.avg.pop.1946.75 +  log.pop.density.1961.75 + log.rugged + island +  log.reliance1 + log.malpct_aug1965.75+
              region, data = data)


reg3 <- lm (log.avg.gdp.92.20 ~ log.avg.pop.1946.75 +
              log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 + 
               +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data)


reg4 <- lm (hdi2019 ~ log.avg.pop.1946.75 +
              log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
               +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data)


reg5 <- lm (mortality.current ~ log.avg.pop.1946.75 +
              log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
              +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data)


#Calculating heteroscedasticity robust std errors
reg1_r <- coeftest(reg1, vcov. = vcovHC(reg1, type = "HC1"))
reg2_r <- coeftest(reg2, vcov. = vcovHC(reg2, type = "HC1"))
reg3_r <- coeftest(reg3, vcov. = vcovHC(reg3, type = "HC1"))
reg4_r <- coeftest(reg4, vcov. = vcovHC(reg4, type = "HC1"))
reg5_r <- coeftest(reg5, vcov. = vcovHC(reg5, type = "HC1"))


main_table1 <- stargazer(reg1, reg2, reg3, reg4, reg5,
                        align = TRUE, no.space = TRUE, type = 'text', df = FALSE, digits = 2,
                        font.size = "small", column.sep.width = "0.01pt", float = FALSE, header = FALSE, se = list(reg1_r[,"Std. Error"], reg2_r[,"Std. Error"], reg3_r[,"Std. Error"],  reg4_r[,"Std. Error"], reg5_r[,"Std. Error"]),
                        covariate.labels = c("Avg.Population (1946-75, logged)", "Pop. Density (1946-75, logged)", "Rugged (logged)", "Malaria Risk (1965-75, logged)", "Island", 
                                             "Reliance on Oil (1946-75, logged)", "Urbanization (1946-75, logged)", "Threat to Sovereignty", "Democracy (1946-75)",
                                             "Violence at Independence", "UK Colony", 
                                             "Aid Per Capita (1961-75, logged)", "Historical Ethnic Frac (1946-75)"), 
                        dep.var.labels=c("Log(Avg.GDPpc)","HDI", "Infant Mortality"), 
                        omit.stat = c("adj.rsq", "ser"), 
                        omit = c("regionEurope and Central Asia", "regionLatin America and The Caribbean", 
                                 "regionMiddle East and North Africa", "regionSouth Asia", 
                                 "regionSub-Saharan Africa"), 
                        add.lines = list(c("Region Fixed Effects",
                                                "Yes", "Yes", "Yes", "Yes", "Yes")),
                        column.labels= c("", "(1992 - 2020)", "", "(2019)", "(2019)"))
                        

# Hand code on to excel table


````


## 3. Figure 3 
\begin{figure}
  \caption{Coefficient plot using different measures of small states} 
  \label{fig:coefplot}
```{r fig.align='center', tidy=TRUE, tidy.opts=list(width.cutoff=65), warning=FALSE, echo=FALSE}


## Figure 3

start_col <- which(names(data) == "X1976")  # Starting column index
end_col <- which(names(data) == "X1991")  # Ending column index

data$log.avg.pop.1976.91 = log(rowMeans(data[, start_col:end_col], na.rm = TRUE))

start_col <- which(names(data) == "X1992")  # Starting column index
end_col <- which(names(data) == "X2020")  # Ending column index

data$log.avg.pop.1992.20 = log(rowMeans(data[, start_col:end_col], na.rm = TRUE))


# Compare growth across different measures of size


reg1 <- lm (log.avg.gdp.92.20 ~ log.avg.pop.1946.75 +
               log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data)

reg1_out <-tidy(reg1)              

 
reg2 <- lm (log.avg.gdp.92.20 ~ log.avg.pop.1976.91 +
               log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data)


reg2_out <-tidy(reg2) 

reg3 <- lm (log.avg.gdp.92.20 ~ log.avg.pop.1992.20 +
               log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data)

reg3_out <-tidy(reg3)  


reg4 <- lm (log.avg.gdp.92.20 ~ log(X2020) +
               log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data)

reg4_out <-tidy(reg4) 


reg1_r <- coeftest(reg1, vcov. = vcovHC(reg1, type = "HC1"))
reg2_r <- coeftest(reg2, vcov. = vcovHC(reg2, type = "HC1"))
reg3_r <- coeftest(reg3, vcov. = vcovHC(reg3, type = "HC1"))
reg4_r <- coeftest(reg4, vcov. = vcovHC(reg4, type = "HC1"))


reg1_out <- tidy(reg1_r)
reg2_out <- tidy(reg2_r)
reg3_out <- tidy(reg3_r)
reg4_out <- tidy(reg4_r)


regs_all <- rbind(reg1_out, reg2_out, reg3_out, reg4_out)


# changing names of variables
regs_all$term[regs_all$term == "log.avg.pop.1946.75" ] <- "Avg Pop (1946-1975, logged)"
regs_all$term[regs_all$term == "log.avg.pop.1976.91" ] <- "Avg Pop (1976-1991, logged)"
regs_all$term[regs_all$term == "log.avg.pop.1992.20" ] <- "Avg Pop (1992-2020, logged)"
regs_all$term[regs_all$term == "log(X2020)" ] <- "Pop (2020, logged)"


# Filtering to delete coefficients of regional dummies and intercept

regs_all <- regs_all[!(regs_all$term == "regionEurope and Central Asia" | regs_all$term == "regionLatin America and The Caribbean" | regs_all$term == "regionLatin America and The Caribbean" | regs_all$term == "regionMiddle East and North Africa" | regs_all$term == "regionNorth America" | regs_all$term == "regionSouth Asia" | regs_all$term == "regionSub-Saharan Africa"), ]

regs_all <- regs_all[!(regs_all$term == "(Intercept)" | regs_all$term == "log.avg.urbanization.75" | regs_all$term == "log.reliance1" | regs_all$term == "log.rugged" | regs_all$term == "island" | regs_all$term == "log.malpct_aug1965.75" | regs_all$term == "log.pop.density.1961.75"|  regs_all$term == "sov.threat2" | regs_all$term == "demo1946.75" |
regs_all$term == "IndViol2Violent" |  regs_all$term == "ukUK" |
regs_all$term == "log.reliance1" | regs_all$term == "log.avg.aid"
| regs_all$term == "avg_EF.1946.75"),]




# Re-orders regs_all so that Population at Independence 

regs_all$term <- factor(regs_all$term,
                   levels = c(
                    "Avg Pop (1946-1975, logged)", 
                    "Avg Pop (1976-1991, logged)",
                    "Avg Pop (1992-2020, logged)",
                    "Pop (2020, logged)"))



figure3 <- dwplot(regs_all,dot_args = list(color = "black"), 
  line_args = list(color = "black")) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, y = term), color = "black", height = 0) +
  geom_vline(xintercept = 0, linetype =3, size = 1.5, colour = "grey60") +
  theme_bw() +
  theme(legend.position = "NULL")

figure3

# Export Figure 3    


```



## 4. Hypothesis 2: Historical Size and Cold War Embedded Liberalism (Figure 4)


\begin{figure}
  \caption{Mediation Analysis of Cold War Embeddedness on Post-Cold War Prosperity} 
  \label{fig:coefplottrade}
```{r fig.align='center', tidy=TRUE, tidy.opts=list(width.cutoff=65), warning=FALSE, echo=FALSE}



data2 <- na.omit(data[, c("log.avg.gdp.92.20", "cw_embed", "log.avg.pop.1946.75",
                           "avg.rule.1996.19", "trade.global.1976.91",
                          "fragility_2_mean", "publicexp.imf.avg.1976.1995",
                          "log.pop.density.1961.75", "log.rugged", "island",
                          "log.reliance1", "log.malpct_aug1965.75",
                          "log.avg.urbanization.75", "sov.threat2",
                          "demo1946.75", "IndViol2", "uk", "log.avg.aid",
                          "avg_EF.1946.75", "region")])


data2$fragility_2_mean <- (data2$fragility_2_mean) / 100

reg1 <- lm (log.avg.gdp.92.20  ~ log.avg.pop.1946.75  +
               + log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data2)


reg1_r <- coeftest(reg1, vcov. = vcovHC(reg1, type = "HC1"))
reg_out1 <- tidy(reg1_r)

reg_out1$term[reg_out1$term == "log.avg.pop.1946.75" ] <- "Total Effect \n (Post-Cold War Economic Development)"


# Add Cold War trade variable 

reg2 <- lm (log.avg.gdp.92.20  ~ log.avg.pop.1946.75  + cw_embed +
               + log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data2)


reg2_r <- coeftest(reg2, vcov. = vcovHC(reg2, type = "HC1"))
reg_out2 <- tidy(reg2_r)

reg_out2$term[reg_out2$term == "log.avg.pop.1946.75" ] <- 	
"Direct Effect \n (Cold War EL Control)"



reg3 <- lm (avg.rule.1996.19 ~ log.avg.pop.1946.75  + 
               + log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data2)


reg3_r <- coeftest(reg3, vcov. = vcovHC(reg3, type = "HC1"))
reg_out3 <- tidy(reg3_r)

reg_out3$term[reg_out3$term == "log.avg.pop.1946.75" ] <- 
  "Total Effect \n (Post-Cold War Economic Inclusion)"


reg4 <- lm (avg.rule.1996.19 ~ log.avg.pop.1946.75  + trade.global.1976.91+
               + log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data2)


reg4_r <- coeftest(reg4, vcov. = vcovHC(reg4, type = "HC1"))
reg_out4 <- tidy(reg4_r)

reg_out4$term[reg_out4$term == "log.avg.pop.1946.75" ] <- 
  "Direct Effect \n (Cold War Open Trade Policy Control)"



reg5 <- lm (fragility_2_mean ~ log.avg.pop.1946.75  + 
               + log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data2)


reg5_r <- coeftest(reg5, vcov. = vcovHC(reg5, type = "HC1"))
reg_out5 <- tidy(reg5_r)

reg_out5$term[reg_out5$term == "log.avg.pop.1946.75" ] <- 
  "Total Effect \n (Post-Cold War State Instability)"


reg6 <- lm (fragility_2_mean ~ log.avg.pop.1946.75  + publicexp.imf.avg.1976.1995 +
               + log.pop.density.1961.75 + 
              log.rugged + island +
              + log.reliance1 + log.malpct_aug1965.75 +
                 +log.avg.urbanization.75 + sov.threat2 + demo1946.75 +
              IndViol2 + uk + log.avg.aid +
              avg_EF.1946.75 + 
              region, data = data2)


reg6_r <- coeftest(reg6, vcov. = vcovHC(reg6, type = "HC1"))
reg_out6 <- tidy(reg6_r)

reg_out6$term[reg_out6$term == "log.avg.pop.1946.75" ] <- 
  "Direct Effect \n (Cold War Public Sector Size Control)"


regs_all <- rbind(reg_out1, reg_out2, reg_out3, reg_out4, reg_out5, reg_out6)
# changing names of variables
regs_all <- regs_all %>% filter (term == "Total Effect \n (Post-Cold War Economic Development)" | 
                           term == "Direct Effect \n (Cold War EL Control)" | term == "Total Effect \n (Post-Cold War Economic Inclusion)" | term == "Direct Effect \n (Cold War Open Trade Policy Control)" | term == "Total Effect \n (Post-Cold War State Instability)" |
              term == "Direct Effect \n (Cold War Public Sector Size Control)") 



# Re-orders regs_all so that APopulation at Independence 

figure4 <- dwplot(regs_all, dot_args = list(color = "black"), 
  line_args = list(color = "black")) + 
   geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, y = term), color = "black", height = 0) +
  geom_vline(xintercept = 0, linetype =2, colour = "grey60") +
  theme_bw() +
  theme(legend.position = "NULL")

figure4
# Export Figure


```

